<?php
/**
 * Freelancer.com Scraper
 * Fetches latest freelance projects
 */

// Include database
require_once __DIR__ . '/../app/core/db.php';

echo "Starting Freelancer.com Scraper...\n";

try {
    // Get database connection
    $db = getDbConnection();
    
    // Freelancer RSS feed URLs for different categories
    $rssFeeds = [
        'https://www.freelancer.com/rss.xml',
    ];
    
    $totalInserted = 0;
    $totalSkipped = 0;
    
    foreach ($rssFeeds as $feedUrl) {
        echo "Fetching from: $feedUrl\n";
        
        // Fetch RSS feed
        $options = [
            'http' => [
                'header' => "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36\r\n"
            ]
        ];
        $context = stream_context_create($options);
        
        $xml = @file_get_contents($feedUrl, false, $context);
        
        if ($xml === false) {
            echo "⚠️ Failed to fetch feed\n";
            continue;
        }
        
        // Parse XML
        $rss = @simplexml_load_string($xml);
        
        if ($rss === false) {
            echo "⚠️ Failed to parse XML\n";
            continue;
        }
        
        $items = $rss->channel->item ?? [];
        echo "Found " . count($items) . " items\n";
        
        foreach ($items as $item) {
            $title = (string)$item->title;
            $link = (string)$item->link;
            $description = (string)$item->description;
            $pubDate = (string)$item->pubDate;
            
            // Extract project ID from link
            preg_match('/projects\/[^\/]+\/(\d+)/', $link, $matches);
            $externalId = $matches[1] ?? md5($link);
            
            // Check if already exists
            $stmt = $db->prepare("SELECT id FROM projects WHERE external_id = ? AND source = 'freelancer' LIMIT 1");
            $stmt->execute([$externalId]);
            
            if ($stmt->fetch()) {
                $totalSkipped++;
                continue;
            }
            
            // Parse published date
            $publishedAt = date('Y-m-d H:i:s', strtotime($pubDate));
            
            // Clean description (remove HTML tags)
            $cleanDesc = strip_tags($description);
            $cleanDesc = html_entity_decode($cleanDesc);
            $cleanDesc = trim($cleanDesc);
            
            // Extract budget if present in description
            $budget = null;
            if (preg_match('/\$(\d+(?:,\d+)?(?:\.\d{2})?)/', $description, $budgetMatch)) {
                $budget = str_replace(',', '', $budgetMatch[1]);
            }
            
            // Insert into database
            $stmt = $db->prepare("
                INSERT INTO projects (
    source, external_id, title, description, 
    budget, project_url, published_at, 
    status, created_at
) VALUES (
    'freelancer', ?, ?, ?, ?, ?, ?, 'active', NOW()
)
            ");
            
            $stmt->execute([
                $externalId,
                $title,
                $cleanDesc,
                $budget,
                $link,
                $publishedAt
            ]);
            
            $totalInserted++;
        }
    }
    
    echo "\n✅ Freelancer scraping completed!\n";
    echo "   - Inserted: $totalInserted new projects\n";
    echo "   - Skipped: $totalSkipped existing projects\n";
    
} catch (Exception $e) {
    echo "\n❌ Error: " . $e->getMessage() . "\n";
    exit(1);
}